#!/usr/bin/env Rscript

suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(effsize))

# Load the external and internal CSV files
external <- read_csv("./external.csv", col_types=cols(
  issue = col_character(),
  category = col_character(),
  tool = col_character(),
  question = col_character(),
  participant = col_character(),
  response = col_double()
))


internal <- read_csv("./internal.csv", col_types=cols(
  issue = col_character(),
  category = col_character(),
  tool = col_character(),
  question = col_character(),
  participant = col_character(),
  response = col_double()
))

external <- external %>% mutate(type="external")
internal <- internal %>% mutate(type="internal")

data <- bind_rows(internal, external)

# # Is there a difference between the tool's performance in convincing users that an issue
# # exists and its performance in guiding them to the fix?

# errcheck <- data %>% filter(tool=="errcheck")
# wilcox.test(response~question, errcheck, exact=FALSE)
# gosec <- data %>% filter(tool=="gosec")
# wilcox.test(response~question, gosec, exact=FALSE)
# VD.A(response~question, gosec)
# revive <- data %>% filter(tool=="revive")
# wilcox.test(response~question, revive, exact=FALSE)
# VD.A(response~question, revive)
# staticcheck <- data %>% filter(tool=="staticcheck")
# wilcox.test(response~question, staticcheck, exact=FALSE)
# VD.A(response~question, staticcheck)



# # Is there a difference in a tool's performance between categories?
  
# errcheck_convincing <- data |> filter(tool=="errcheck") |> filter(question=="convincing")
# pairwise.wilcox.test(errcheck_convincing$response, errcheck_convincing$category, exact=FALSE, p.adj = "bonf")
# gosec_convincing <- data |> filter(tool=="gosec") |> filter(question=="convincing")
# pairwise.wilcox.test(gosec_convincing$response, gosec_convincing$category, exact=FALSE, p.adj = "bonf")
# revive_convincing <- data |> filter(tool=="revive") |> filter(question=="convincing")
# pairwise.wilcox.test(revive_convincing$response, revive_convincing$category, exact=FALSE, p.adj = "bonf")
# VD.A(response~category, revive_convincing |> filter(category==1 | category==5))
# revive_convincing <- data |> filter(tool=="revive") |> filter(question=="convincing")
# pairwise.wilcox.test(staticcheck_convincing$response, staticcheck_convincing$category, exact=FALSE, p.adj = "bonf")

# errcheck_guiding <- data |> filter(tool=="errcheck") |> filter(question=="guiding")
# pairwise.wilcox.test(errcheck_guiding$response, errcheck_guiding$category, exact=FALSE, p.adj = "bonf")
# gosec_guiding <- data |> filter(tool=="gosec") |> filter(question=="guiding")
# pairwise.wilcox.test(gosec_guiding$response, gosec_guiding$category, exact=FALSE, p.adj = "bonf")
# revive_guiding <- data |> filter(tool=="revive") |> filter(question=="guiding")
# pairwise.wilcox.test(revive_guiding$response, revive_guiding$category, exact=FALSE, p.adj = "bonf")
# staticcheck_guiding <- data |> filter(tool=="staticcheck") |> filter(question=="guiding")
# pairwise.wilcox.test(staticcheck_guiding$response, staticcheck_guiding$category, exact=FALSE, p.adj = "bonf")



data |> 
  mutate(opinion = case_when(
    response==1 | response==2 ~ "negative",
    response==3               ~ "neutral",
    response==4 | response==5 ~ "positive",
  )) |>
  group_by(tool, question, opinion) |>
  count() |> 
  group_by(question, tool) |>
  filter(n == max(n)) |>
  arrange(question, tool) |>
  print(n=Inf)